* data prep watershed level estimates

* data starts at .05 degree level and aggregated to .5 degree to merge
*** then aggregated to pfaf4 level and merged with pfaf4 characteristics 

* follows:
* read_lights_raster.do (remote sensed droughts and lights), 
* read_ascii_cru.do (reads CRU sc-PDSI, temp and precip)
* read_other_rasters.do (to read in cross-sectional rasters) 
* read_pfaf4char.do (creates pfaf4_char.dta) --- dams and other pfaf4 characteristics

* preceeds reg_pfaf4.do

log using merge_pfaf4, replace
timer on 1


** start with .05 degree data
** get pfa4 level data for these 
use "../data/grid_lights_drought.dta", clear
sort x y year

* merge cross-sectional data that's at .05 degree cell level
* merge cross-sectional data that's at .05 degree cell level
merge m:1 x y using "../data/flares.dta"
drop if _merge==2
gen flare=(detection_frequency_2012>0) & _merge==3
drop _merge

summarize lights if flare==1, detail
replace lights=. if flare
replace ihslights=. if flare


*drop flare cells in the same manner as in cell-level data
gen x_10=floor((x-1)/10) + 1
gen y_10=floor((y-1)/10) + 1 
bys x_10 y_10: egen flaremin=min(flare)
replace lights=. if flaremin>0
replace ihslights=. if flaremin>0


merge m:1 x y using "../data/pfaf4_grid.dta"
drop _merge

collapse (mean) lights ihslights drought (count) cellsinpfaf4=lights, by(conpfaf4 year)


label var cellsinpfaf4 "Num 0.05 degree cells in pfa4"

tempfile fine

save `fine', replace

* now get .5 degreee data
use  "../data/pfaf4_grid.dta", clear

gen x_10=floor((x-1)/10) + 1
gen y_10=floor((y-1)/10) + 1 

*pick pfaf4 that is most common in .5 degree grid cell
bys x_10 y_10: egen pfaf4mode=mode(conpfaf4), maxmode 

collapse (firstnm) conpfaf4=pfaf4mode, by(x_10 y_10)


rename x_10 x
rename y_10 y

sort x y


* add temp data at half degree level
merge 1:m x y using "../data/annual_av_tmp.dta"
drop if _merge==2
drop _merge

sort y x year

** merge sc-PDSI data
merge 1:1 y x year using "../data/pdsi.dta", sorted
drop if _merge==2
drop _merge


* add WB groundwater data

* use WB definition of center of cells
gen lon=-179.75+.5*(x-1)
gen lat=74.75-.5*(y-1)


label var lon "Logitude (midpoint)"
label var lat "Latitude (midpoint)"


sort lon lat
merge m:1 lon lat using "../data/aqtyp_gwresource_grid05deg.dta"
drop if _merge==2
drop _merge



*** now collapse to pfaf4 level

sort conpfaf4 year

collapse (mean) pdsi* *tmp aqtyp_pct* resource*, by(conpfaf4 year)


** combine with more disaggreagated data

merge 1:1 conpfaf4 year using `fine'
drop if _merge==1
drop _merge

* add dams data from spatial join
merge m:1 conpfaf4 using "../data/pfaf4_char.dta"
drop if _merge==2
drop _merge



*create information on lights by pfaf4 over time so can exclude unpopulated places
bysort conpfaf4: egen minlights=min(lights)
 
label var minlights "Min (over time) lights for subbasin" 

drop if minlights==0 | missing(minlights)


** all labels after collapse

label var lights "Light index"
label var drought "Remote-sensed DSI"
label var conpfaf4 "Pfaf4, unique by continent"
label var tmp "Annual av temp"
label var dev_tmp "Temp anomaly"


*11 bins
generate dsi_cat_temp=recode(drought,-1.5,-1.2,-.9,-.6,-.3,.3,.6,.9,1.2,1.5,4)
egen byte dsi_cat11=group(dsi_cat_temp)
drop dsi_cat_temp
*7 bins
gen dsi_cat7=dsi_cat11
replace dsi_cat7=7 if dsi_cat11>=7
gen extsev=(dsi_cat11<=2)
gen extreme=(dsi_cat11==1)



label define cat11 1 "Extreme drought"  2 "Severe drought" 3 "Moderate drought" 4 "Mild drought" ///
5 "Incipient drought" 6 "Near normal" 7 "Incipient wet spell"  8 "Slightly wet" 9 "Moderately wet" ///
10 "Very wet"  11 "Extremely wet"
label values dsi_cat11 cat11
label define cat7 1 "Extreme drought"  2 "Severe drought" 3 "Moderate drought" 4 "Mild drought" ///
5 "Incipient drought" 6 "Near normal" 7 "Wet"  
label values dsi_cat7 cat7
label var dsi_cat11 "DSI, 11 categories"
label var dsi_cat7 "DSI, 7 categories"
label var extsev "Extreme or severe drought"

label var aqtyp_pct_ma "Major alluvial (%)" 
label var aqtyp_pct_cx "Complex aquifer (%)" 
label var aqtyp_pct_kt  "Karstic (%)"
label var aqtyp_pct_ls "Local/shallow aquifer (%)"
label var aqtyp_pct_NA "No aquifer data (%)" 
label var resource_pct_grid_na "Not covered by country-lithological-outcrop resource data (% of grid cell)"
label var resource "GW resource, sum w/in cell (10^9 m3/yr)" 
label var resource_norm "GW resource, normalized (10^9 m3/yr)"


*using definitions in van der Schrier article
generate pdsi_cat_temp=recode(pdsi_av,-4,-3,-2,-1,-.5,.5,1,2,3,4,30)
gen pdsi_summer_cat_temp=recode(pdsi_summer,-4,-3,-2,-1,-.5,.5,1,2,3,4,30)
gen pdsi_min_cat_temp=recode(pdsi_summer,-4,-3,-2,-1,-.5,.5,1,2,3,4,30)

*matching distribution of cut points in remote sensed DSI
*_pctile pdsi_av, percentiles(4, 8, 15, 25, 36, 63, 75, 85, 92, 97)
*generate pdsi_cat_temp=recode(pdsi_av,`r(r1)',`r(r2)',`r(r3)',`r(r4)',`r(r5)',`r(r6)',`r(r7)',`r(r8)',`r(r9)',`r(r10)',30)
egen byte pdsi_cat=group(pdsi_cat_temp)
egen byte pdsi_summer_cat=group(pdsi_summer_cat_temp)
egen byte pdsi_min_cat=group(pdsi_min_cat_temp)

drop pdsi_cat_*temp 

label var pdsi_cat "sc-PDSI categories"
label var pdsi_summer_cat "sc-PDSI categories in June or Dec"
label var pdsi_min_cat "sc-PDSI categories at annual min"

label define pcat 1 "Extremely dry"  2 "Severely dry" 3 "Moderately dry" 4 "Mildly dry" ///
5 "Incipient drought" 6 "Near normal" 7 "Incipient wet spell"  8 "Slightly wet" 9 "Moderately wet" ///
10 "Very wet"  11 "Extremely wet"
label values pdsi_cat pcat
label values pdsi_summer_cat pcat
label values pdsi_min_cat pcat


compress

label data "Data at the Pfaf4 (not grid cell) level"


save for_reg_pfaf4, replace

timer off 1
timer list 1
log close

timer clear 1
